# Bootstrapped confidence intervals used in Table 3

library(boot)
library(tidyverse)

df <- read.csv("merged_final.csv")
df <- df %>%
  filter(FAMILY_INCOME > 10000)

adj_r_sqr <- function(formula, data, indices){
  d <- data[indices,]
  fit <- lm(formula, data = d)
  return(summary(fit)$adj.r.squared)
}

# To compare SAT Math or EBRW, replace "RSAT_TOTAL_SCORE" with "RSAT_MATH_SCORE"
# or "RSAT_EBRW"
set.seed(1993)
dict_boot <- boot(data = df, statistic = adj_r_sqr,
                    R = 300, formula = RSAT_TOTAL_SCORE ~ WC + Analytic + Clout + 
                      Authentic + Tone + WPS + Sixltr + Dic + function. + pronoun +
                      ppron + i + we + you + shehe + they + ipron + article + 
                      prep + auxverb + adverb + conj + negate + verb + adj + compare +
                      interrog + number + quant + affect + posemo + negemo + anx+
                      anger + sad + social + family + friend + female + male + cogproc +
                      insight + cause + discrep + tentat + certain + differ + percept + 
                      see + hear + feel + bio + body + health + sexual + ingest + drives +
                      affiliation + achieve + power + reward + risk + focuspast + 
                      focuspresent + focusfuture + relativ + motion + space +
                      time + work + leisure + home + money + relig + death + informal +
                      swear + netspeak + assent + nonflu + filler + AllPunc + Period +
                      Comma + Colon + SemiC + QMark + Exclam + Quote + Apostro + 
                      Parenth + OtherP)
                      )

# Log the topics
df[,6:75] <- log(df[,6:75])

topic_boot <- boot(data = df, statistic = adj_r_sqr,
                    R = 300, formula = RSAT_TOTAL_SCORE ~ 
                    Winning_Competitions 
                   +Math
                   +AP_Classes
                   +Work_And_Goals
                   +Camping_Swimming
                   +Social_Anxiety
                   +Gendered_Activities
                   +Fashion_Style
                   +Family_Members
                   +Medical_Experiences
                   +Helping_Others
                   +Despite_Words
                   +Latinx_Family_Issues
                   +Education_Opportunity
                   +Classroom_Experiences
                   +Youth_Volunteering
                   +Reading_Writing
                   +Making_Planning
                   +Visual_Art
                   +Travel
                   +Leadership_Skills
                   +Seeking_Answers
                   +Mental_Health
                   +Outside_School_Programs
                   +Volunteer_Cleaning
                   +Work_Experiences
                   +Family_Death
                   +Motivations_Goals
                   +Psychology_Understanding
                   +Group_Leadership
                   +Sports_Experiences
                   +World_Histories
                   +China
                   +Language_Experiences
                   +Cooking
                   +Civic_Experiences
                   +Time_Management
                   +Sensory_Experiences
                   +Sociocultural_Diversity
                   +Business_Economics
                   +Performance_Art
                   +Computer_Science
                   +Photography
                   +School_Activities
                   +Humor_Storytelling
                   +Group_Assignments
                   +Work_Money
                   +Process_Words
                   +Boy_Scouts
                   +Video_Film
                   +Family_Church
                   +Building_Engines
                   +Human_Nature
                   +Music
                   +Life_Reflections
                   +Time_Cycles
                   +Life_Challenges
                   +Sensory_Responses
                   +HS_Years
                   +Sports_General
                   +School_Grades
                   +Dancing_Art
                   +Community_Service
                   +Preference_Words
                   +Achievement_Words
                   +Puzzles_Problems
                   +Chemistry_Biology
                   +Tutoring_Groups
                   +Physics
                   +New_Exepriences)


quantile(topic_boot$t,c(0.025,0.975))
quantile(dict_boot$t,c(0.025,0.975))

# Or replace as.matrix(df[,76:167]) with as.matrix(df[,6:75]) for topics
summary(lm(FAMILY_INCOME ~ as.matrix(df[,76:167]), data = df))$adj.r.squared


################################################################################
# Decile Bootstrapped CI
# 
# Generates values used in Figure 3 (stored in Data_Files as "liwc.txt" and
# "topics.txt")
#

fi_cuts <- quantile(na.omit(df$FAMILY_INCOME),
                    probs = c(.1,.2,.3,.4,.5,.6,.7,.8,.9))
df$fi <- df$FAMILY_INCOME



df_perc <- df %>%
  mutate(percentile = case_when(fi <= fi_cuts[1] ~ 1,
                                fi > fi_cuts[1] & fi <= fi_cuts[2] ~ 2,
                                fi > fi_cuts[2] & fi <= fi_cuts[3] ~ 3,
                                fi > fi_cuts[3] & fi <= fi_cuts[4] ~ 4,
                                fi > fi_cuts[4] & fi <= fi_cuts[5] ~ 5,
                                fi > fi_cuts[5] & fi <= fi_cuts[6] ~ 6,
                                fi > fi_cuts[6] & fi <= fi_cuts[7] ~ 7,
                                fi > fi_cuts[7] & fi <= fi_cuts[8] ~ 8,
                                fi > fi_cuts[8] & fi <= fi_cuts[9] ~ 9,
                                fi > fi_cuts[9] ~ 10))

# To test each decile, replace "10" with the decile of interest
specific_decile <- df_perc %>%
  filter(percentile == 10)

liwc_boot_perc <- boot(data = df_perc, statistic = adj_r_sqr,
                    R = 300, formula = RSAT_TOTAL_SCORE ~ WC + Analytic + Clout + 
                      Authentic + Tone + WPS + Sixltr + Dic + function. + pronoun +
                      ppron + i + we + you + shehe + they + ipron + article + 
                      prep + auxverb + adverb + conj + negate + verb + adj + compare +
                      interrog + number + quant + affect + posemo + negemo + anx+
                      anger + sad + social + family + friend + female + male + cogproc +
                      insight + cause + discrep + tentat + certain + differ + percept + 
                      see + hear + feel + bio + body + health + sexual + ingest + drives +
                      affiliation + achieve + power + reward + risk + focuspast + 
                      focuspresent + focusfuture + relativ + motion + space +
                      time + work + leisure + home + money + relig + death + informal +
                      swear + netspeak + assent + nonflu + filler + AllPunc + Period +
                      Comma + Colon + SemiC + QMark + Exclam + Quote + Apostro + 
                      Parenth + OtherP)

df_perc[,6:75] <- log(df_perc[,6:75])

topic_boot_perc <- boot(data = df_perc, statistic = adj_r_sqr,
                    R = 300, formula = RSAT_TOTAL_SCORE ~ 
                    Winning_Competitions 
                    +Math
                    +AP_Classes
                    +Work_And_Goals
                    +Camping_Swimming
                    +Social_Anxiety
                    +Gendered_Activities
                    +Fashion_Style
                    +Family_Members
                    +Medical_Experiences
                    +Helping_Others
                    +Despite_Words
                    +Latinx_Family_Issues
                    +Education_Opportunity
                    +Classroom_Experiences
                    +Youth_Volunteering
                    +Reading_Writing
                    +Making_Planning
                    +Visual_Art
                    +Travel
                    +Leadership_Skills
                    +Seeking_Answers
                    +Mental_Health
                    +Outside_School_Programs
                    +Volunteer_Cleaning
                    +Work_Experiences
                    +Family_Death
                    +Motivations_Goals
                    +Psychology_Understanding
                    +Group_Leadership
                    +Sports_Experiences
                    +World_Histories
                    +China
                    +Language_Experiences
                    +Cooking
                    +Civic_Experiences
                    +Time_Management
                    +Sensory_Experiences
                    +Sociocultural_Diversity
                    +Business_Economics
                    +Performance_Art
                    +Computer_Science
                    +Photography
                    +School_Activities
                    +Humor_Storytelling
                    +Group_Assignments
                    +Work_Money
                    +Process_Words
                    +Boy_Scouts
                    +Video_Film
                    +Family_Church
                    +Building_Engines
                    +Human_Nature
                    +Music
                    +Life_Reflections
                    +Time_Cycles
                    +Life_Challenges
                    +Sensory_Responses
                    +HS_Years
                    +Sports_General
                    +School_Grades
                    +Dancing_Art
                    +Community_Service
                    +Preference_Words
                    +Achievement_Words
                    +Puzzles_Problems
                    +Chemistry_Biology
                    +Tutoring_Groups
                    +Physics
                    +New_Exepriences)
        
quantile(liwc_boot_perc$t,c(0.025,0.975))
quantile(topic_boot_perc$t,c(0.025,0.975))


